Data source: www.covidtracking.com
library(tidyverse)
library(themebg)
raw_state <- read_csv("https://covidtracking.com/api/states/daily.csv") %>%
mutate_at("date", as.character) %>%
mutate_at("date", parse_date, format = "%Y%m%d") %>%
arrange(date, state) %>%
mutate(str_date = as.character(date))
raw_state_info <- read_csv("https://covidtracking.com/api/states/info.csv")
df_state_info <- raw_state_info %>%
select(state, name)
raw_usa <- read_csv("https://covidtracking.com/api/us/daily.csv") %>%
mutate_at("date", as.character) %>%
mutate_at("date", parse_date, format = "%Y%m%d") %>%
mutate(str_date = as.character(date))
raw_lockdown <- read_csv(
"../data/raw/kaggle_jcyzag/countryLockdowndates.csv",
skip = 1,
col_names = c("country", "name", "lockdown_date", "lockdown", "reference")
) %>%
mutate_at("lockdown_date", parse_date, format = "%d/%m/%Y")
df_state_lockdown <- raw_lockdown %>%
select(name, lockdown_date, lockdown)
df_usa <- raw_usa %>%
arrange(date) %>%
mutate(
death_positive = death / positive,
death_hospital = death / hospitalized,
death_icu = death / inIcuCumulative,
death_vent = death / onVentilatorCumulative,
hospital_positive = hospitalized / positive,
pos_inc_chg = positiveIncrease - lag(positiveIncrease),
death_inc_chg = deathIncrease - lag(deathIncrease),
pos_inc_chg_pct = (pos_inc_chg / positiveIncrease) * 100,
death_inc_chg_pct = (death_inc_chg / deathIncrease) * 100
)
df_state <- raw_state %>%
group_by(state, date) %>%
mutate(
death_positive = death / positive,
death_hospital = death / hospitalized,
death_icu = death / inIcuCumulative,
death_vent = death / onVentilatorCumulative,
hospital_positive = hospitalized / positive
) %>%
ungroup() %>%
left_join(df_state_info, by = "state") %>%
left_join(df_state_lockdown, by = "name") %>%
mutate_at("lockdown", as_factor)
df_us_ny <- raw_state %>%
mutate(is_ny = state == "NY") %>%
group_by(date, is_ny) %>%
summarize_at(c("positive", "negative", "death"), sum, na.rm = TRUE) %>%
mutate(tests = positive + negative) %>%
group_by(is_ny) %>%
mutate(
positive_change = positive - lag(positive),
negative_change = negative - lag(negative),
death_change = death - lag(death),
tests_change = tests - lag(tests),
death_positive = death / positive
)
library(plotly)
# raw_usa %>%
# ggplot(aes(x = date, y = positive)) +
# geom_line() +
# theme_bg() +
# theme(legend.position = "none")
df_usa %>%
plot_ly(x = ~str_date, y = ~positive) %>%
add_lines() %>%
layout(showlegend = FALSE)
Positive cases in USA
df_usa %>%
plot_ly(x = ~str_date, y = ~positiveIncrease) %>%
# add_lines() %>%
add_bars() %>%
layout(showlegend = FALSE)
Number of of new positive cases each day in USA
df_usa %>%
plot_ly(x = ~str_date, y = ~pos_inc_chg) %>%
add_lines() %>%
# add_bars() %>%
layout(showlegend = FALSE)
Rate of change of new positive cases each day
l <- lm(pos_inc_chg_pct ~ date, df_usa)
df_usa %>%
filter(!is.na(pos_inc_chg_pct)) %>%
plot_ly(x = ~str_date) %>%
add_lines(y = ~pos_inc_chg_pct) %>%
add_lines(y = fitted(l)) %>%
layout(showlegend = FALSE)
Rate of change of new positive cases each day as percent of new positive cases
df_usa %>%
plot_ly(x = ~str_date, y = ~death) %>%
add_lines() %>%
layout(showlegend = FALSE)
Cummulative deaths in USA
df_usa %>%
plot_ly(x = ~str_date, y = ~deathIncrease) %>%
add_bars() %>%
layout(showlegend = FALSE)
Number of new deaths each day in USA
df_usa %>%
plot_ly(x = ~str_date, y = ~death_inc_chg) %>%
add_lines() %>%
# add_bars() %>%
layout(showlegend = FALSE)
Rate of change of new deaths each day
l <- lm(death_inc_chg_pct ~ date, df_usa)
df_usa %>%
filter(!is.na(death_inc_chg_pct)) %>%
plot_ly(x = ~str_date) %>%
add_lines(y = ~death_inc_chg_pct) %>%
add_lines(y = fitted(l)) %>%
layout(showlegend = FALSE)
Rate of change of new deaths each day as percent of new deaths
df_usa %>%
plot_ly(x = ~str_date, y = ~death_positive) %>%
add_lines() %>%
layout(showlegend = FALSE)
Deaths per positive cases
df_usa %>%
plot_ly(x = ~str_date, y = ~hospitalized) %>%
add_lines() %>%
layout(showlegend = FALSE)
Cummulative hospitalizations
df_usa %>%
plot_ly(x = ~str_date, y = ~hospitalizedIncrease) %>%
add_bars() %>%
layout(showlegend = FALSE)
Number of new hospitalizations each day
df_usa %>%
plot_ly(x = ~str_date, y = ~hospital_positive) %>%
add_lines() %>%
layout(showlegend = FALSE)
Hospitalizations per positive cases
df_usa %>%
plot_ly(x = ~str_date, y = ~death_hospital) %>%
add_lines() %>%
layout(showlegend = FALSE)
Deaths per hospitalized cases
df_state %>%
plot_ly(x = ~str_date, y = ~positive, color = ~state, colors = "Paired") %>%
add_lines() %>%
layout(showlegend = FALSE)
Positive cases by state
df_state %>%
filter(state != "NY") %>%
ggplot(aes(x = date, y = positive, color = state)) +
geom_line() +
theme_bg() +
theme(legend.position = "none")
df_state %>%
plot_ly(x = ~str_date, y = ~positiveIncrease, color = ~state, colors = "Paired") %>%
add_bars() %>%
layout(showlegend = FALSE)
Number of new positive cases each day by state
df_state %>%
plot_ly(x = ~str_date, y = ~death, color = ~state, colors = "Paired") %>%
add_lines() %>%
layout(showlegend = FALSE)
Number of deaths by state
df_state %>%
plot_ly(x = ~str_date, y = ~deathIncrease, color = ~state, colors = "Paired") %>%
add_bars() %>%
layout(showlegend = FALSE)
Number of new deaths each day by state
df_state %>%
plot_ly(x = ~str_date, y = ~death_positive, color = ~state, colors = "Paired") %>%
add_lines() %>%
layout(showlegend = FALSE)
Deaths per positive cases by state
df_state %>%
plot_ly(x = ~str_date, y = ~hospitalized, color = ~state, colors = "Paired") %>%
add_lines() %>%
layout(showlegend = FALSE)
Number of hospitalizations by state
df_state %>%
plot_ly(x = ~str_date, y = ~hospitalizedIncrease, color = ~state, colors = "Paired") %>%
add_bars() %>%
layout(showlegend = FALSE)
Number of new hospitalizations each day by state
df_state %>%
plot_ly(x = ~str_date, y = ~hospital_positive, color = ~state, colors = "Paired") %>%
add_lines() %>%
layout(showlegend = FALSE)
Hospitalizations per positive case by state
df_state %>%
plot_ly(x = ~str_date, y = ~death_hospital, color = ~state, colors = "Paired") %>%
add_lines() %>%
layout(showlegend = FALSE)
Deaths per hospitalized case by state
df_state %>%
ggplot(aes(x = date, y = positiveIncrease)) +
geom_col() +
geom_vline(aes(xintercept = lockdown_date, linetype = lockdown)) +
facet_wrap(~ state, scales = "free_y") +
theme_bg() +
theme(legend.position = "none")
Increase in daily cases after lockdown by state
df_us_ny %>%
ggplot(aes(x = date, y = positive, color = is_ny)) +
geom_line() +
theme_bg()
Number of positive cases in NY vs. rest of USA
df_us_ny %>%
ggplot(aes(x = date, y = negative, color = is_ny)) +
geom_line() +
theme_bg()
Number of negative tests in NY vs. rest of USA
df_us_ny %>%
ggplot(aes(x = date, y = tests, color = is_ny)) +
geom_line() +
theme_bg()
Number of tests performed in NY vs. rest of USA
df_us_ny %>%
ggplot(aes(x = date, y = death, color = is_ny)) +
geom_line() +
theme_bg()
Number of deaths in NY vs. rest of USA
df_us_ny %>%
ggplot(aes(x = date, y = positive_change, color = is_ny)) +
geom_smooth() +
geom_point() +
theme_bg()
Number of of new positive cases each day in NY vs. rest of USA
df_us_ny %>%
ggplot(aes(x = date, y = death_change, color = is_ny)) +
geom_smooth() +
geom_point() +
theme_bg()
Number of new deaths each day in NY vs. rest of USA
df_us_ny %>%
filter(death_positive > 0) %>%
ggplot(aes(x = date, y = death_positive, color = is_ny)) +
geom_line() +
theme_bg()
Number of deaths per positive cases in NY vs. rest of USA